We will see how is the impact of COvid-19 on the USA. lets load our libraries first
library(plotly)
library(lubridate)
library(tidyr)
library(dplyr)
Our dataset for state-wise covid infection data.
df = read.csv("data/us_states_covid19_daily.csv")
we need state and positive number of cases , hospitalized, totaltestsviral.
req_cols = c("date","state","positive","hospitalized","totalTestsViral")
df_pos = df[,req_cols]
df_pos[is.na(df_pos)] <- 0
print((paste("number of NA rows:",sum(is.na(df_pos)))))
## [1] "number of NA rows: 0"
converting date into “date” datatype.
df_pos$date = as.character(df_pos$date)
df_pos$date = as.Date(df_pos$date,format = ("%Y%m%d") , origin = "20200122")
class(df_pos$date)
## [1] "Date"
plot_ly(x = df_pos$date,y = df_pos$positive, color = factor(df_pos$state),
mode = "lines") %>% layout(title = "USA COVID-19 trends from JAN to DEC 2020")
since our plot is looking messy we can try to find out which states are not doing well.
df_pos$state = as.factor(df_pos$state)
class(df_pos$state)
## [1] "factor"
df_state = group_by(df_pos , state)
new_df = data.frame(summarise(df_state, Total_positive_cases = sum(positive) ,
Total_Hospitalized = sum(hospitalized), Total_testing = sum(totalTestsViral)))
head(new_df)
## state Total_positive_cases Total_Hospitalized Total_testing
## 1 AK 1656760 55545 78690463
## 2 AL 24282758 2622751 0
## 3 AR 13422076 861542 159758326
## 4 AS 0 0 249791
## 5 AZ 35894317 3215012 0
## 6 CA 126643296 0 2310461373
sorted_df = new_df[order(new_df$Total_positive_cases,decreasing = TRUE),]
head(sorted_df)
## state Total_positive_cases Total_Hospitalized Total_testing
## 6 CA 126643296 0 2310461373
## 48 TX 114486497 0 1014046744
## 11 FL 106829756 7070416 1178962691
## 38 NY 104056991 21377064 0
## 17 IL 59718277 0 906112502
## 12 GA 48555886 4667944 455023678
Top 10 states with highest number of positive cases.
top_10 = sorted_df[1:10,]
head(top_10)
## state Total_positive_cases Total_Hospitalized Total_testing
## 6 CA 126643296 0 2310461373
## 48 TX 114486497 0 1014046744
## 11 FL 106829756 7070416 1178962691
## 38 NY 104056991 21377064 0
## 17 IL 59718277 0 906112502
## 12 GA 48555886 4667944 455023678
state_name = as.character(top_10$state)
df_pos$state = as.character(df_pos$state)
new_df_pos = df_pos[df_pos$state %in% state_name,]
plot_ly(x = new_df_pos$date,y = new_df_pos$positive, color = factor(new_df_pos$state),
mode = "lines") %>% layout(title = "Top 10 US states COVID-19 trends from JAN to DEC 2020")
choropleth map of USA depicting Positivity rate.
state_pop <- data.frame(State = new_df$state ,Positive_case = new_df$Total_positive_cases)
state_pop$hover <- with(state_pop, paste(State, "<br>", "positive_cases:", Positive_case))
borders <- list(color = toRGB("red"))
map_options <- list(
scope = "usa",
projection = list(type= "albers usa"),
showlakes = TRUE,
lakecolor = toRGB("white")
)
plot_ly(state_pop , z = state_pop$Positive_case , text = state_pop$hover ,locations = state_pop$State,
type = "choropleth" ,locationmode = "USA-states",
color = state_pop$Positive_case,colorscale = "Reds" , marker = list(line = borders)) %>%
layout(title = "USA COVID-19 positivity" , geo = map_options)